
libname q 'c:\SASCodeforBook\SASDataSets';

Data First; set q.houstonpropensityscores2;
run;
/*Note Lamar Elementary was a Conect school only for a year or so.  It is included in these
regressions but not in the production runs.  q.propensity2 does not contain the gpropensity2 
scores based on ordinary least squares(OLS)regression of the treatment indicator on the predictors.
This data set is used to make these scores and also the gpropensity3 scores.
The dataset HoustonSchools contains these Goldbergerized "G" scores based on OLS and Firth
logistic regression. That dataset also contains the original propensity scores based on
another predictor set. The code below was updated with Firth April 20, 2009 and checked in 
September 2010.  In order for the logistic regression model to estimate properly, the data must
be modified following Goldberger's procedure and then Firth logistic regression applied to 
the Goldbergerized data. This code will produce the propensity scores presented in Table 12.5*/


 Data second; set first;
 *this keeps the correct predictor set;
keep group4 school conect grad tot_students LEP_ whitprob migrants_count
        maleprob mobility_ locale native__american_count reduprob lowgrade
        asians_islanders_count exemplary recognized unacceptable junk;

Title 'Ordinary least squares regression prior to Goldberger corrections to data';
 Proc reg data = second;
  model Conect =  grad tot_students LEP_ whitprob migrants_count
        maleprob mobility_ locale native__american_count reduprob lowgrade
        asians_islanders_count exemplary recognized unacceptable junk
  /stb tol vif;

  output out=b
   p=yhat
   r=yresid ;
run;

data bob1; set b;
*The following code Goldbergerizes the predictors;
If p GE 1 then do;
   p = .99;
   end;

If p le 0 then do;
   p = .01;
   end;
q= 1-p;
s = sqrt(p*q);
oneovers = 1/s;
grados = grad/s;
totstudos = tot_students/s;
LEP_os = LEP_/s;
whiteos = whitprob/s;
migrntos = migrants_count/s;
maleos = maleprob/s;
mobilos = mobility_/s;
localos = locale/s;
naos = native__AMERICAN_COUNT/s;
reduos = reduprob/S;
lgrados = lowgrade/s;
asianos = asians_islanders_count/s;
exempos = exemplary/s;
recogos = recognized/s;
unacptos= unacceptable/s;
junkos = junk/s;
run;

*The following dataset keeps the goldbergerized predictors;
Data bob2; set bob1 (keep = group4 school conect oneovers grados totstudos LEP_os
        whiteos migrntos maleos mobilos localos naos reduos lgrados asianos
        exempos recogos unacptos junkos);
*oneovers one over s replaces the intercept in this noint (i.e. no intercept) regression;

Title 'Ordinary least squares regression after Goldberger corrections to data';
proc reg data=bob2;
	model conect = oneovers grados totstudos LEP_os
    whiteos migrntos maleos mobilos localos naos reduos lgrados asianos
    exempos recogos unacptos junkos
        / noint tol vif stb;

  output out=b2
   p=yhat
   r=yresid ;
run;

*yhat is the OLS Goldbergerized propensity scores referred to later as GPropensity2;
data bob3; set b2;
proc sort; by group4;
proc means; var yhat; by group4;
run;

data bob4; set bob3 (keep = yhat group4 school conect);
if 1 le group4 le 2;
Title 'For Table 9.5: OLS gpropensity2 scores for schools: Conect=1, comparison group =0 ';
proc print data= bob4;
run;


/*This following code is experimental, it attempts to calculate the Firth corrected logistic
effects based on the data prior to Goldberger corrections.  This model does not converge.
Data second has the untransformed predictors that are not Goldbergerized*/

Title 'Logistic Regression on Untransformed Data (Does not converge after 10,000 iterations';
	Proc logistic data = second outest = betas covout;
	model Conect (event = '1') = Grad  tot_students lep_  Whitprob migrants_count maleprob 
	mobility_ locale Native__American_count reduprob lowgrade asians_islanders_count
	exemplary recognized unacceptable junk / maxiter= 100 aggregate scale = none
	rsquare lackfit stb Firth;
	output out=pred p = phat lower = lcl upper = ucl;
run;
Title 'Invalid parameter estimates from logistic regression (Firth, no Goldberger corrections)';
 	proc print data = betas;
run;

 data bobL; set pred (keep= phat group4 school conect);
    proc sort; by group4;
	proc means;	by group4; 
run;

data bobLL; set bobL;
	if 1 le group4 le 2;
	proc print data= bobLL;
run;
  
*This code merges bobLL and bob4 so that the propensities can be correlated;
 
data mergetry1; set bob4;
proc sort; by school;
run;


data mergetry2; set bobLL;
proc sort; by school;
run;

Title 'Phat is from logistic regression only with Firth, yhat is from Goldbergerized OLS';
data mergertry3; merge mergetry1 mergetry2; by school;
proc corr; var phat yhat;
run;
/*Note: The above phat propensities based on the unadjusted data are not correctly estimated*/

/*The following calculates the Firth corrected estimates using the Goldbergerized predictors. 
This model converges*/

Title 'Firth corrected logistic regression using Goldberized Predictors';
	Proc logistic data = bob2 outest = betas covout;
    model Conect (event = '1') =  oneovers grados totstudos LEP_os
    whiteos migrntos maleos mobilos localos naos reduos lgrados asianos
    exempos recogos unacptos junkos / maxiter= 20 aggregate scale = none 
	noint rsquare lackfit stb Firth;
	output out=pred p = gphat lower = lcl upper = ucl;
 run;

 proc print data = betas;
 Title 'For Table 9.5: parameter estimates from Goldberger-Firth Logistic Regression';
 run;

 data GbobL; set pred (keep= gphat group4 school conect);
 		proc sort; by group4;
		proc means;	by group4; 
run;

data GbobLL; set GbobL;
if 1 le group4 le 2;
proc print data= GbobLL;
run;
  
/*This code merges GbobLL (Goldbergerized + Firth),  bob4 (Goldbergerized OLS), and Firth only
so that the propensity scores can be correlated*/
 
*bob4 has the goldbergerized OLS estimates;
data Gmergetry1; set bob4;
proc sort; by school;
run;

* Gbobll has the Goldbergized + Firth estimates;
data Gmergetry2; set GbobLL;
proc sort; by school;
run;

*mergetry2 has the Firth non converging estimates;
*ghat is goldbergerized + Firth logistic scores, yhat is goldberized OLS scores,
phat are regular Firth scores from the model that did not converge;

data Gmergertry3; merge Gmergetry1 Gmergetry2 mergetry2; by school;
Title 'gphat= goldberger+Firth, yhat=Goldberger + OLS, phat = invalid estimates';
proc corr; var gphat yhat phat;
run;

/* Note: Use GPropensity3 scores: Goldberger and Firth corrected*/ 
/* However, the GPropensity2 scores could also be used*/ 
/* The code for the propensity score creation is now complete*/

/* The syntax in this file is copyright by Robert B. Smith,  2011. */
